<?php
// Report simple running errors
error_reporting(E_ERROR | E_WARNING | E_PARSE);
require('C:/xampp/htdocs/gag-cgt/includes/functions.php');
require('C:/xampp/htdocs/gag-cgt/includes/simple_html_dom.php');
$latestUrl = 'http://suong.vn/danh-muc/moi-nhat';
function getSuongvnPost($latestUrl){
	$ch = curl_init();

	$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
	$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
	$header[] = "Cache-Control: max-age=0";
	$header[] = "Connection: keep-alive";
	$header[] = "Keep-Alive: 300";
	$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
	$header[] = "Accept-Language: en-us,en;q=0.5";
	$header[] = "Pragma: "; // browsers keep this blank.
	// set URL and other appropriate options
	curl_setopt($ch, CURLOPT_URL, $latestUrl);
	curl_setopt($ch, CURLOPT_HEADER, 0);
	curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
	//curl_setopt($ch, CURLOPT_PROXY, '210.245.0.24:80');
	curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);

	// grab URL and pass it to the browser
	$responseContent = curl_exec($ch);
	curl_close($ch);
	$html = str_get_html($responseContent);	
	$type = 'photo';
	$postTitle = '';
	$postImage = '';
	$videoCode = '';
	$postLink = '';
	$postType = 'photo';
	if(trim($html) != ''){	
		foreach($html->find('.content-left') as $element){
			$h1Object = $element->find('h2', 0);						
			if($h1Object){
				$aTitle = $h1Object->find('a', 0);			
				$postTitle = strip_tags($aTitle->innertext);					
				$postContain = $element->find('.post-container', 0);				
				$linkObj = $postContain->find('a', 0);
				$postLink = $linkObj->href;
				$postImage = $linkObj->find('img', 0)->src;
				$postVideo =  $postContain->find('.cunach-icon',0);	
				echo "----------------------\nCrawling page $i/$postLink:\n";				
				if(strlen($postVideo) > 0){					
					$chdetail = curl_init();
					$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
					$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
					$header[] = "Cache-Control: max-age=0";
					$header[] = "Connection: keep-alive";
					$header[] = "Keep-Alive: 300";
					$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
					$header[] = "Accept-Language: en-us,en;q=0.5";
					$header[] = "Pragma: "; // browsers keep this blank.
					// set URL and other appropriate options
					curl_setopt($chdetail, CURLOPT_URL, 'http://suong.vn'.$postLink);
					curl_setopt($chdetail, CURLOPT_HEADER, 0);
					curl_setopt($chdetail, CURLOPT_HTTPHEADER, $header);
					//curl_setopt($chdetail, CURLOPT_PROXY, '210.245.0.24:80');
					curl_setopt($chdetail, CURLOPT_RETURNTRANSFER, true);
					// grab URL and pass it to the browser
					$responseContentDetail = curl_exec($chdetail);
					curl_close($chdetail);
					$html_detail = str_get_html($responseContentDetail);	
					$postVideoContent =  $html_detail->find('div.video-content',0);
					$postIframeSrc = $postVideoContent->find('iframe', 0)->src;
					if(preg_match("/embed\/.*?\?/", $postIframeSrc, $postInfo)){
						$videoInfo = explode("/", $postInfo[0]);
					}
					$postType = 'video';
					$videoCode = str_replace('?','',$videoInfo[1]); 							
				}
				else{
					$postType = 'photo';
					$videoCode = '';
				}
				
				if(trim($postTitle) == '' || trim($postImage) == '' || trim($postLink) == '') continue;
				$unique = md5($postImage);
				echo "\tcrawling [".end(explode('/',$postLink))."] .. ";
				$postTitle = mysql_real_escape_string(strip_tags($postTitle));
				$postLink = mysql_real_escape_string($postLink);
				$fileInfo = imageDownloader($postImage);
				
				if(!$fileInfo) continue;
				$postImage = $fileInfo['name'];
				if($postType == 'photo'){
					if(isAnimatedImage(IMAGE_PATH.$fileInfo['name'])){
						$postType == 'gif';
					}
				}
				$parse = parse_url($postLink);
				$sqlInsert = "INSERT INTO data(original_url, post_title, post_image, post_unique, crawl_date, file_checksum,item_type,video_code,source_page) VALUES('".$postLink."', '$postTitle', '$postImage', '$unique', NOW(), '".$fileInfo['checksum']."', '$postType', '$videoCode', '".$parse['host']."')";
				if(mysql_query($sqlInsert)){
					echo "\tdone\n";
				}
				else{
					echo "\tfailed -> \n".mysql_error()."\n\n";
					#die("-------BREAK-------");
				}
			}
		}
	}
}
require('C:/xampp/htdocs/gag-cgt/dbconnect.php');
#echo '<ol>';
$numPage = 20;
for($i = 1; $i <=$numPage; $i++){
	#echo '<li>';
	echo "----------------------\nCrawling page $i/$numPage:\n";
	getSuongvnPost($latestUrl.$i);
	sleep(5);
	#echo '</li>';
}
echo "\n----------------------[DONE]----------------------\n";